library(tidyverse)
library(cowplot)
library(GGally)
library(heatmaply)
library(sva)
library(limma)
library(broom)
library(ggridges)
# Cells #
vpa.cell.neg.raw <- read_csv("./data/abundances/vpa_1and2_cells_target_negmode.csv")
vpa.cell.pos.raw <- read_csv("./data/abundances/vpa_1and2_cells_target_posmode.csv")
# Media #
vpa.med.neg.raw <- read_csv("./data/abundances/vpa_1and2_media_target_negmode.csv")
vpa.med.pos.raw <- read_csv("./data/abundances/vpa_1and2_media_target_posmode.csv")
# Cells #
vpa.cell.neg.compound.info <- read_csv("./data/compound_info/vpa_1and2_cells_target_negmode_cmpnd_info.csv")
vpa.cell.pos.compound.info <- read_csv("./data/compound_info/vpa_1and2_cells_target_posmode_cmpnd_info.csv")
# Media #
vpa.med.neg.compound.info <- read_csv("./data/compound_info/vpa_1and2_media_target_negmode_cmpnd_info.csv")
vpa.med.pos.compound.info <- read_csv("./data/compound_info/vpa_1and2_media_target_posmode_cmpnd_info.csv")
# Kegg/other ID reference #
cmpnd.id.db <- read_csv("./data/other/anp_db_compound_kegg.csv")
# run order and protein quant (cell samples)
# although run order is the same for media samples
vpa.cell.other.info <- read_csv("./data/other/vpa_exp1_other_info.csv")
MissingPerSamplePlot <- function(raw.data, start.string) {
# Counts the number of missing/NA values per sample and
# percent compounds missing out of total number of compounds per sample
# Then passes the result into a vertical bar plot, where each
# bar represents a single sample and the size of the bar
# is the % of compounds missing
counted.na <- raw.data %>%
select(starts_with(start.string)) %>%
mutate(
count.na = apply(., 1, function(x) sum(is.na(x))),
percent.na = (count.na / ncol(raw.data %>% select(starts_with(start.string)))) * 100
) %>%
dplyr::select(count.na, percent.na) %>%
bind_cols(
raw.data %>%
select(Samples, Group)
) %>%
arrange(percent.na) %>%
mutate(f.order = factor(Samples, levels = Samples))
counted.na %>%
ggplot(aes(x = f.order, y = percent.na, fill = Group)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 20, color = "gray", size = 1, alpha = 0.8) +
coord_flip()+
xlab("Samples") +
ylab("Percent missing values in sample") +
theme(axis.text.y = element_text(size = 6))
}
MissingPerCompound <- function(raw.data, start.string){
# Function to count in how many experimental samples each compound is missing
# Also calculates the % missing:
# (# NA per compound across all experimental samples) * 100 / (tot num of samples)
raw.data %>%
filter(Group == "sample") %>%
select(Samples, starts_with(start.string)) %>%
gather(key = "Compound", value = "Abundance", -Samples) %>%
group_by(Compound) %>%
summarise(
na_count = sum(is.na(Abundance)),
n_samples = n(),
percent_na = (na_count * 100 / n_samples)
) %>%
filter(na_count > 0) %>%
arrange(desc(percent_na))
}
ReplaceNAwMinLogTransformSingle <- function(raw.dataframe, start.prefix) {
# Function to replace any NAs in each column with the minimum for that column,
# separately for each sample type.
# NA in negative control samples are replaced by 2.
# Then data is log2 transformed
smpls <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(starts_with(start.prefix))
smpls.min <- lapply(smpls, min, na.rm = TRUE)
# replace the missing values in the real samples with the minimum of the samples
# then take the log
smpls.noNA <- raw.dataframe %>%
filter(Group == "sample") %>%
dplyr::select(Samples:Experiment) %>%
bind_cols(
smpls %>%
replace_na(replace = smpls.min) %>%
log2()
)
# QC
QC <- raw.dataframe %>%
filter(Group == "QC") %>%
dplyr::select(starts_with(start.prefix))
QC.min <- lapply(QC, min, na.rm = TRUE)
# replace the missing values in the QC with the minimum of the QC
# then take the log
QC.noNA <- raw.dataframe %>%
filter(Group == "QC") %>%
dplyr::select(Samples:Experiment) %>%
bind_cols(
QC %>%
replace_na(replace = QC.min) %>%
log2()
)
# replace the missing values in solv and empty samples with 2 - for PCA analysis
# then take the log
other.min <- setNames(
as.list(
rep(2, ncol(
raw.dataframe %>%
dplyr::select(starts_with(start.prefix))))
),
colnames(raw.dataframe %>% dplyr::select(starts_with(start.prefix)))
)
other.num.log <- raw.dataframe %>%
filter(Group != "sample" & Group != "QC") %>%
dplyr::select(Samples:Experiment) %>%
bind_cols(
raw.dataframe %>%
filter(Group != "sample" & Group != "QC") %>%
dplyr::select(starts_with(start.prefix)) %>%
replace_na(replace = other.min) %>%
log2()
)
# combine them together back into one data frame
all.noNA <- smpls.noNA %>%
bind_rows(QC.noNA) %>%
bind_rows(other.num.log)
}
HeatmapPrepAlt <- function(raw.data, start.prefix){
# function for preparing dara for heatmap viz
x <- raw.data %>%
select(starts_with(start.prefix)) %>%
scale(center = TRUE, scale = TRUE) %>%
as.matrix()
row.names(x) <- raw.data$Samples
return(x)
}
Q: What are the distributions of compound masses and retention times?
# bind all 4 compound info df into 1
full.vpa.cmpnd <- vpa.cell.neg.compound.info %>%
mutate(Set = "Cells / Neg") %>%
bind_rows(vpa.cell.pos.compound.info %>% mutate(Set = "Cells / Pos")) %>%
bind_rows(vpa.med.neg.compound.info %>% mutate(Set = "Media / Neg")) %>%
bind_rows(vpa.med.pos.compound.info %>% mutate(Set = "Media / Pos"))
full.vpa.cmpnd %>%
ggplot(aes(x = rt, y = mass)) +
geom_point(size = 3, alpha = 0.3) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA Only Exp") +
ylim(0, 1100)
full.vpa.cmpnd %>%
ggplot(aes(x = rt, y = mass, color = Set)) +
geom_point(size = 3, alpha = 0.5) +
xlab("Retention Time (min)") +
ylab("Mass (Da)") +
ggtitle("Mass v RT\nVPA Only Exp") +
facet_wrap(~ Set) +
ylim(0, 1100)
The compounds included in the Cells / Neg mode and Cells / Pos mode datasets loook to have a good spread of masses and retention times within the expected RT window of 0 to 29min and mass window of 50 to 1000Da. A smaller number of compounds were detected in the media samples, and the larger molecules that were detected in cells were not detected in media. This is not a surprising result, media in general is expected to have a lower number of compounds than the cells themselves, as cells produce a wide variety of molecules that are not exported to the media.
Q: Which compounds were found in one or more of the data types?
### vpa cell join ###
vpa.cell.cmpnd.join <- vpa.cell.neg.compound.info %>%
inner_join(vpa.cell.pos.compound.info, by = "cas_id", suffix = c(".c.n", ".c.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / cells
print(vpa.cell.cmpnd.join$compound_full.c.n)
[1] "Glycine"
[2] "Acetoin"
[3] "Alanine"
[4] "Sarcosine"
[5] "Beta-Alanine"
[6] "2-Aminobutyric Acid"
[7] "BAIBA"
[8] "Serine"
[9] "Hypotaurine"
[10] "Uracil"
[11] "Creatinine"
[12] "5,6-Dihydrouracil"
[13] "Proline"
[14] "Valine"
[15] "Threonine"
[16] "Taurine"
[17] "4-Oxoproline"
[18] "Ketoleucine"
[19] "trans-4-Hydroxyproline"
[20] "Creatine"
[21] "Isoleucine"
[22] "Leucine"
[23] "Asparagine"
[24] "5-Hydroxyhexanoic Acid"
[25] "Aspartic Acid"
[26] "Adenine"
[27] "Hypoxanthine"
[28] "O-Phosphoethanolamine"
[29] "Glutamine"
[30] "Lysine"
[31] "N-Acetylserine"
[32] "Glutamic Acid"
[33] "Methionine"
[34] "D-Ribose"
[35] "Guanine"
[36] "Xanthine"
[37] "3-Sulfinoalanine"
[38] "Histidine"
[39] "Orotic Acid"
[40] "Allantoin"
[41] "Fucose"
[42] "Phenylalanine"
[43] "Pyridoxal"
[44] "Cysteic Acid"
[45] "Pyridoxine"
[46] "3-Methylhistidine"
[47] "G3P"
[48] "Glycerol 1-Phosphate / Glycerol 3-Phosphate"
[49] "N-Carbamoyl-L-aspartic Acid"
[50] "Tyrosine"
[51] "D-Galactitol"
[52] "Phosphocholine"
[53] "N-alpha-Acetylglutamine"
[54] "Tryptophan"
[55] "Phosphocreatine"
[56] "Glycerylphosphorylethanolamine"
[57] "O-Succinylhomoserine"
[58] "Pantothenic Acid"
[59] "GlcNAc"
[60] "Cystathionine"
[61] "Deoxycytidine"
[62] "Cytidine"
[63] "Uridine"
[64] "Palmitoleic Acid"
[65] "Glycerol 1-phosphoserine"
[66] "D-Glucose 6-phosphate"
[67] "Thiamine"
[68] "Inosine"
[69] "Myoinositol-methyl-phosphate"
[70] "Linoleic Acid"
[71] "Guanosine"
[72] "Xanthosine"
[73] "L-Arginosuccinic Acid"
[74] "Ricinoleic Acid"
[75] "Glutathione"
[76] "11Z,14Z-Eicosadienoic Acid (20:2 n-6)"
[77] "CMP"
[78] "UMP"
[79] "3-Phosphoglyceroinositol"
[80] "AMP"
[81] "GMP"
[82] "SAH"
[83] "CDP"
[84] "ADP"
[85] "GDP"
[86] "LysoPE(18:2)"
[87] "LysoPE(18:0)"
[88] "dTTP"
[89] "CTP"
[90] "UTP"
[91] "CDP-Choline"
[92] "dATP"
[93] "ATP"
[94] "GTP"
[95] "ADP-Ribose"
[96] "UDP-Galactose"
[97] "UDP-Glucose"
[98] "UDP-Glucuronic acid"
[99] "ADP-Glucose"
[100] "GDP-Glucose"
[101] "UDP-GalNAc"
[102] "UDP-GlcNAc"
[103] "GSSG"
[104] "NAD"
[105] "NADH"
[106] "NADP"
[107] "PC(36:4)"
[108] "FAD"
[109] "Acetyl-CoA"
# percent of cell / neg compounds found in cell / pos
round(nrow(vpa.cell.cmpnd.join) * 100 / nrow(vpa.cell.neg.compound.info), 1)
[1] 51.7
# percent of cell / neg compounds found in cell / pos
round(nrow(vpa.cell.cmpnd.join) * 100 / nrow(vpa.cell.pos.compound.info), 1)
[1] 61.9
### vpa media join ###
vpa.med.cmpnd.join <- vpa.med.neg.compound.info %>%
inner_join(vpa.med.pos.compound.info, by = "cas_id", suffix = c(".m.n", ".m.p")) %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
# compound names - found in pos and neg mode / media
print(vpa.med.cmpnd.join$compound_full.m.n)
[1] "Alanine" "Serine"
[3] "Uracil" "Valine"
[5] "Threonine" "Taurine"
[7] "Thymine" "4-Oxoproline"
[9] "Isoleucine" "Leucine"
[11] "Glutamine" "Methionine"
[13] "Xanthine" "Histidine"
[15] "Allantoin" "Phenylalanine"
[17] "Uric Acid" "D-Glucose"
[19] "Tyrosine" "D-Galactitol"
[21] "Cysteine-S-sulfonic Acid" "Tryptophan"
[23] "Pantothenic Acid" "Cytidine"
[25] "Uridine" "Inosine"
[27] "Guanosine" "Folic Acid"
# percent of media / neg compounds found in media / pos
round(nrow(vpa.med.cmpnd.join) * 100 / nrow(vpa.med.neg.compound.info), 1)
[1] 51.9
# percent of media / pos compounds found in media / neg
round(nrow(vpa.med.cmpnd.join) * 100 / nrow(vpa.med.pos.compound.info), 1)
[1] 50
### vpa all modes join ###
vpa.all.cmpnd.join <- vpa.cell.cmpnd.join %>%
inner_join(vpa.med.cmpnd.join, by = "cas_id") %>%
select(
contains("cas_id"), contains("short"),
contains("full"), starts_with("formula"),
starts_with("mass"), starts_with("rt")
)
nrow(vpa.all.cmpnd.join)
[1] 23
# check for any mass issues between all 4 modes
vpa.all.cmpnd.join %>%
select(contains("mass")) %>%
ggpairs()
# any rt inconsistencies?
vpa.all.cmpnd.join %>%
select(starts_with("rt")) %>%
ggpairs()
Q: Do any of the samples have greater than 20% missing (NA) compound abundances, out of all of the features in the dataset?
MissingPerSamplePlot(vpa.cell.neg.raw, "VPAnC") +
ggtitle("Missing Per Sample\nVPA-only / Cells / Neg Mode")
MissingPerSamplePlot(vpa.cell.pos.raw, "VPApC") +
ggtitle("Missing Per Sample\nVPA-only / Cells / Pos Mode")
MissingPerSamplePlot(vpa.med.neg.raw, "VPAnM") +
ggtitle("Missing Per Sample\nVPA-only / Media / Neg Mode")
MissingPerSamplePlot(vpa.med.pos.raw, "VPApM") +
ggtitle("Missing Per Sample\nVPA-only / Media / Pos Mode")
A: No, sample files across both datasets have very few missing values. The green-colored bars, marked “sample” are the actual experimental samples. Whereas the “solv”, or “solvent”, and “empty” samples are negative control samples that are expected to have many missing values and/or low compound abundances. They will be used to narrow down the list of features later on in the analysis.
Q: Are any of the compounds more than 20% missing in the experimental sample group? If there are any, they will be excluded from analysis.
Note: The MissingPerCompound function considers only the Group == "sample" samples.
MissingPerCompound(vpa.cell.neg.raw, "VPAnC") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
(vpa.cell.pos.cmpnd.to.excl<- MissingPerCompound(vpa.cell.pos.raw, "VPApC") %>%
filter(percent_na > 20))
# A tibble: 1 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPApC110 9 23 39.1
MissingPerCompound(vpa.med.neg.raw, "VPAnM") %>%
filter(percent_na > 20)
# A tibble: 0 x 4
# ... with 4 variables: Compound <chr>, na_count <int>, n_samples <int>,
# percent_na <dbl>
(vpa.med.pos.cmpnd.to.excl <- MissingPerCompound(vpa.med.pos.raw, "VPApM") %>%
filter(percent_na > 20))
# A tibble: 1 x 4
Compound na_count n_samples percent_na
<chr> <int> <int> <dbl>
1 VPApM34 6 24 25
A: No compounds need to be excluded from the Cell / Neg Mode and Media / Neg Mode datasets, but 1 compound each will be excluded from the Cell / Pos Mode and Media / Pos Mode datasets.
# get the mean abundance of each compound, grouped by solvent vs empty sample vs experimental sample
vpa.cell.neg.raw.grp.mean <- vpa.cell.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPAnC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
# plot the log2 density distribution of the means
vpa.cell.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Cells / Negative Mode\nGrouped by sample type")
# for plotting purposes, to make the plot neater
vpa.cell.neg.raw.grp.mean.order <- vpa.cell.neg.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vpa.cell.neg.raw %>%
select(Samples, Group, starts_with("VPAnC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Cells / Negative Mode")
# compound mean by group only, ordered by increasing abundance in the experimental samples
vpa.cell.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA-only / Cells / Negative Mode")
vpa.cell.neg.raw.grp.diff <- vpa.cell.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 50)
vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
vpa.cell.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-3, 13.5) +
ylim(-3, 13.5) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Cells / Neg Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
vpa.cell.neg.cmpnd.to.incl <- vpa.cell.neg.raw.grp.diff %>%
filter((smpl_solv_diff > 2.5 & smpl_empty_diff > 2.5) | is.na(smpl_solv_diff) | is.na(smpl_empty_diff))
# how many compound were there in the raw negative mode dataset?
nrow(vpa.cell.neg.raw.grp.diff)
[1] 211
# how many compounds are retained for further analysis?
nrow(vpa.cell.neg.cmpnd.to.incl)
[1] 179
vpa.cell.pos.raw.grp.mean <- vpa.cell.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPApC")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vpa.cell.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Cells / Positive Mode\nGrouped by sample type")
vpa.cell.pos.raw.grp.mean.order <- vpa.cell.pos.raw.grp.mean %>%
filter(Group == "sample") %>%
arrange(Grp_mean_abun)
vpa.cell.pos.raw %>%
select(Samples, Group, starts_with("VPApC")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.cell.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)") +
ggtitle("Profile Plot of compound means by sample type only\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.raw.grp.diff <- vpa.cell.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 50)
vpa.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 50)
vpa.cell.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-1, 15) +
ylim(-1, 15) +
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Cells / Pos Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv or empty)
vpa.cell.pos.cmpnd.to.incl <- vpa.cell.pos.raw.grp.diff %>%
filter((smpl_solv_diff > 2.5 & smpl_empty_diff > 2.5) | is.na(smpl_solv_diff) | is.na(smpl_empty_diff)) %>%
filter(!(Compound %in% vpa.cell.pos.cmpnd.to.excl$Compound))
# how many compound were there in the raw dataset?
nrow(vpa.cell.pos.raw.grp.diff)
[1] 176
# how many compounds are retained for further analysis?
nrow(vpa.cell.pos.cmpnd.to.incl)
[1] 142
vpa.med.neg.raw.grp.mean <- vpa.med.neg.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPAnM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vpa.med.neg.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Media / Negative Mode\nGrouped by sample type")
vpa.med.neg.raw.grp.mean.order <- vpa.med.neg.raw.grp.mean %>%
filter(Group == "empty") %>%
arrange(Grp_mean_abun)
vpa.med.neg.raw %>%
select(Samples, Group, starts_with("VPAnM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1.5) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.neg.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Media / Negative Mode")
vpa.med.neg.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.neg.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)")
vpa.med.neg.raw.grp.diff <- vpa.med.neg.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 25)
vpa.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
vpa.med.neg.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-4, 4) +
ylim(-1, 4) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Media / Neg Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vpa.med.neg.cmpnd.to.incl <- vpa.med.neg.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff))
# how many compound were there in the raw dataset?
nrow(vpa.med.neg.raw.grp.diff)
[1] 54
# how many compounds are retained for further analysis?
nrow(vpa.med.neg.cmpnd.to.incl)
[1] 41
vpa.med.pos.raw.grp.mean <- vpa.med.pos.raw %>%
group_by(Group) %>%
summarize_at(vars(matches("VPApM")), mean, na.rm = TRUE) %>%
gather(key = "Compound", value = "Grp_mean_abun", -Group)
vpa.med.pos.raw.grp.mean %>%
ggplot(aes(log2(Grp_mean_abun), color = Group)) +
geom_density(size = 2, alpha = 0.8) +
ggtitle("Distribution of compound means\nVPA-only / Media / Positive Mode\nGrouped by sample type")
vpa.med.pos.raw.grp.mean.order <- vpa.med.pos.raw.grp.mean %>%
filter(Group == "empty") %>%
arrange(Grp_mean_abun)
vpa.med.pos.raw %>%
select(Samples, Group, starts_with("VPApM")) %>%
gather("Compound", value = "Abundance", -c(Samples, Group)) %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Abundance), color = Group, group = Samples)) +
geom_line(alpha = 0.1, size = 1.5) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
# overlay the group averages
geom_line(
data = vpa.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.pos.raw.grp.mean.order$Compound)),
aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group),
size = 0.5
) +
ggtitle("Profile Plot of all compound abundances\nWith average per sample type overlaid\nVPA-only / Media / Positive Mode")
vpa.med.pos.raw.grp.mean %>%
mutate(Cmpnd_sort = factor(Compound, levels = vpa.med.pos.raw.grp.mean.order$Compound)) %>%
ggplot(aes(Cmpnd_sort, log2(Grp_mean_abun), color = Group, group = Group)) +
geom_point(size = 1, alpha = 0.8) +
geom_line(alpha = 0.8) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
xlab("Compound") +
ylab("log2(Sample Type Mean)")
vpa.med.pos.raw.grp.diff <- vpa.med.pos.raw.grp.mean %>%
spread(Group, Grp_mean_abun) %>%
mutate(
smpl_empty_diff = sample / empty,
smpl_solv_diff = sample / solv,
empty_solv_diff = empty / solv
)
vpa.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff))) +
geom_histogram(bins = 25)
vpa.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_solv_diff))) +
geom_histogram(bins = 25)
vpa.med.pos.raw.grp.diff %>%
ggplot(aes(log2(smpl_empty_diff), log2(smpl_solv_diff))) +
geom_point(size = 2, alpha = 0.5) +
xlim(-6, 2) +
ylim(-1, 7) +
# abline in pink
geom_abline(intercept = 0, slope = 1, color = "#CC79A7", size = 2, alpha = 0.6) +
# lm line in green
geom_smooth(method = "lm", color = "#009E73", size = 2, alpha = 0.6, se = FALSE) +
xlab("log2([Sample Compound Mean] / [Empty Compound Mean])") +
ylab("log2([Sample Compound Mean] / [Solvent Compound Mean])") +
ggtitle("Background signal\nRaw Data / Media / pos Mode")
# include compounds with FC > 2.5 or FC is NA (indication of NA in solv)
vpa.med.pos.cmpnd.to.incl <- vpa.med.pos.raw.grp.diff %>%
filter(smpl_solv_diff > 2.5 | is.na(smpl_solv_diff)) %>%
filter(!(Compound %in% vpa.med.pos.cmpnd.to.excl$Compound))
# how many compound were there in the raw dataset?
nrow(vpa.med.pos.raw.grp.diff)
[1] 56
# how many compounds are retained for further analysis?
nrow(vpa.med.pos.cmpnd.to.incl)
[1] 36
# replace missing with minimum for each Group in each dataset and log2() transform the data:
# exclude compound that have a >20% NA count across samples
vpa.cell.neg.noNA <- vpa.cell.neg.raw %>%
select(Samples:Experiment, one_of(vpa.cell.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPAnC")
vpa.cell.pos.noNA <- vpa.cell.pos.raw %>%
select(Samples:Experiment, one_of(vpa.cell.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPApC")
vpa.med.neg.noNA <- vpa.med.neg.raw %>%
select(Samples:Experiment, one_of(vpa.med.neg.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPAnM")
vpa.med.pos.noNA <- vpa.med.pos.raw %>%
select(Samples:Experiment, one_of(vpa.med.pos.cmpnd.to.incl$Compound)) %>%
ReplaceNAwMinLogTransformSingle("VPApM")
vpa.cell.neg.noNA.gathered <- vpa.cell.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.cell.neg.noNA) == "VPAnC1"):ncol(vpa.cell.neg.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.cell.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Cells / Negative Mode")
# same data format, but as ridge plots
vpa.cell.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Cells / Negative Mode")
# experimental samples only
vpa.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Negative Mode")
# overlay the distributions for another look
vpa.cell.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Negative Mode")
# relationship with protein quant and run order?
vpa.cell.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.cell.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
vpa.cell.pos.noNA.gathered <- vpa.cell.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.cell.pos.noNA) == "VPApC1"):ncol(vpa.cell.pos.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.cell.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Cells / Positive Mode")
# same data format, but as ridge plots
vpa.cell.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Cells / Positive Mode")
# experimental samples only
vpa.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Positive Mode")
# overlay the distributions for another look
vpa.cell.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Cells / Positive Mode")
# relationship with protein quant and run order?
vpa.cell.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.cell.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
vpa.med.neg.noNA.gathered <- vpa.med.neg.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.med.neg.noNA) == "VPAnM1"):ncol(vpa.med.neg.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.med.neg.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Media / Negative Mode")
# same data format, but as ridge plots
vpa.med.neg.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Media / Negative Mode")
# experimental samples only
vpa.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Media / Negative Mode")
# overlay the distributions for another look
vpa.med.neg.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Media / Negative Mode")
# relationship with protein quant and run order?
vpa.med.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.med.neg.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
vpa.med.pos.noNA.gathered <- vpa.med.pos.noNA %>%
gather(
key = "Metabolite", "Abundance",
which(colnames(vpa.med.pos.noNA) == "VPApM1"):ncol(vpa.med.pos.noNA)
)
# plot all abundances in a sample, grouped by sample as a boxplot
vpa.med.pos.noNA.gathered %>%
ggplot(aes(Samples, Abundance, fill = Group)) +
geom_boxplot() +
geom_boxplot(aes(color = Group), fatten = NULL, fill = NA, coef = 0, outlier.alpha = 0, show.legend = FALSE) +
theme(axis.text.x = element_text(angle = 90)) +
ylab("log2(Abundance)") +
ggtitle("Boxplot of compound abundances\nAll samples\nVPA-only / Media / Positive Mode")
# same data format, but as ridge plots
vpa.med.pos.noNA.gathered %>%
ggplot(aes(y = Samples, x = Abundance, fill = Group)) +
geom_density_ridges(scale = 15) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nAll samples\nVPA-only / Media / Positive Mode")
# experimental samples only
vpa.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(y = Samples, x = Abundance, fill = Treatment)) +
geom_density_ridges(scale = 10) +
theme_ridges() +
scale_y_discrete(expand = c(0.01, 0)) +
ggtitle("Ridge plot of compound abundances\nExperimental samples only\nVPA-only / Media / Positive Mode")
# overlay the distributions for another look
vpa.med.pos.noNA.gathered %>%
filter(Group == "sample") %>%
ggplot(aes(Abundance, group = Samples, color = Treatment)) +
geom_density(alpha = 0.8, size = 0.75) +
xlab("log2(Abundance)") +
ggtitle("Density plot of compound abundances\nExperimental samples only\nVPA-only / Media / Positive Mode")
vpa.med.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(run_order, Abundance)) +
geom_jitter(width = 0.1, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(method = "lm", color = "black", alpha = 0.8, se = FALSE)
vpa.med.pos.noNA.gathered %>%
inner_join(vpa.cell.other.info, by = "Samples") %>%
ggplot(aes(prot_conc_ug_uL, Abundance)) +
geom_jitter(width = 0.005, size = 1, alpha = 0.6, aes(color = Treatment)) +
geom_smooth(color = "black", alpha = 0.8, se = FALSE)
Some plots to understand the relationship between the samples, QC samples, solvent, and empty samples in some cases.
### PCA on all Samples ###
vpa.cell.neg.full.pca <- vpa.cell.neg.noNA %>%
select(starts_with("VPAnC")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.cell.neg.full.pca$sdev ^ 2) * 100 / sum(vpa.cell.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Cells / Negative Mode",
type = "b"
)
vpa.cell.neg.full.pca.x <- as.data.frame(vpa.cell.neg.full.pca$x)
row.names(vpa.cell.neg.full.pca.x) <- vpa.cell.neg.noNA$Samples
vpa.cell.neg.full.pca.x <- vpa.cell.neg.full.pca.x %>%
bind_cols(vpa.cell.neg.noNA %>% select(Group:Experiment))
vpa.cell.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (90.5% Var)") +
ylab("PC2 (3.1%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Cells / Negative Mode")
### Samples and QC ###
vpa.cell.neg.smpl.QC.pca <- vpa.cell.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.neg.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.cell.neg.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Cells / Negative Mode",
type = "b"
)
vpa.cell.neg.smpl.QC.pca.x <- as.data.frame(vpa.cell.neg.smpl.QC.pca$x)
vpa.cell.neg.smpl.QC.pca.x <- vpa.cell.neg.smpl.QC.pca.x %>%
bind_cols(
vpa.cell.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.neg.smpl.QC.pca.x) <- vpa.cell.neg.smpl.QC.pca.x$Samples
vpa.cell.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (35.1% Var)") +
ylab("PC2 (19.7%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Negative Mode")
vpa.cell.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (16.1% Var)") +
ylab("PC4 (6.4%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Negative Mode")
### Experimental Samples Only ###
vpa.cell.neg.smpl.pca <- vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.neg.smpl.pca$sdev ^ 2) * 100 / sum(vpa.cell.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Cells / Negative Mode",
type = "b"
)
vpa.cell.neg.smpl.pca.x <- as.data.frame(vpa.cell.neg.smpl.pca$x)
vpa.cell.neg.smpl.pca.x <- vpa.cell.neg.smpl.pca.x %>%
bind_cols(
vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.neg.smpl.pca.x) <- vpa.cell.neg.smpl.pca.x$Samples
vpa.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (34.3% Var)") +
ylab("PC2 (23.8%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Negative Mode")
vpa.cell.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (16.6% Var)") +
ylab("PC4 (7.1%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Negative Mode")
### PCA on all Samples ###
vpa.cell.pos.full.pca <- vpa.cell.pos.noNA %>%
select(starts_with("VPApC")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.cell.pos.full.pca$sdev ^ 2) * 100 / sum(vpa.cell.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Cells / Positive Mode",
type = "b"
)
vpa.cell.pos.full.pca.x <- as.data.frame(vpa.cell.pos.full.pca$x)
row.names(vpa.cell.pos.full.pca.x) <- vpa.cell.pos.noNA$Samples
vpa.cell.pos.full.pca.x <- vpa.cell.pos.full.pca.x %>%
bind_cols(vpa.cell.pos.noNA %>% select(Group:Experiment))
vpa.cell.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (90.0% Var)") +
ylab("PC2 (4.2%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Cells / Positive Mode")
### Samples and QC ###
vpa.cell.pos.smpl.QC.pca <- vpa.cell.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.pos.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.cell.pos.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Cells / Positive Mode",
type = "b"
)
vpa.cell.pos.smpl.QC.pca.x <- as.data.frame(vpa.cell.pos.smpl.QC.pca$x)
vpa.cell.pos.smpl.QC.pca.x <- vpa.cell.pos.smpl.QC.pca.x %>%
bind_cols(
vpa.cell.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.pos.smpl.QC.pca.x) <- vpa.cell.pos.smpl.QC.pca.x$Samples
vpa.cell.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (55.0% Var)") +
ylab("PC2 (15.0%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (11.4% Var)") +
ylab("PC4 (5.0%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Cells / Positive Mode")
### Experimental Samples Only ###
vpa.cell.pos.smpl.pca <- vpa.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.cell.pos.smpl.pca$sdev ^ 2) * 100 / sum(vpa.cell.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Cells / Positive Mode",
type = "b"
)
vpa.cell.pos.smpl.pca.x <- as.data.frame(vpa.cell.pos.smpl.pca$x)
vpa.cell.pos.smpl.pca.x <- vpa.cell.pos.smpl.pca.x %>%
bind_cols(
vpa.cell.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.cell.pos.smpl.pca.x) <- vpa.cell.pos.smpl.pca.x$Samples
vpa.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (57.5% Var)") +
ylab("PC2 (19.6%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Positive Mode")
vpa.cell.pos.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (7.2% Var)") +
ylab("PC4 (4.7%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Cells / Positive Mode")
### PCA on all Samples ###
vpa.med.neg.full.pca <- vpa.med.neg.noNA %>%
select(starts_with("VPAnM")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.med.neg.full.pca$sdev ^ 2) * 100 / sum(vpa.med.neg.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Media / Negative Mode",
type = "b"
)
vpa.med.neg.full.pca.x <- as.data.frame(vpa.med.neg.full.pca$x)
row.names(vpa.med.neg.full.pca.x) <- vpa.med.neg.noNA$Samples
vpa.med.neg.full.pca.x <- vpa.med.neg.full.pca.x %>%
bind_cols(vpa.med.neg.noNA %>% select(Group:Experiment))
vpa.med.neg.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group, shape = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (75.8% Var)") +
ylab("PC2 (8.4%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Media / Negative Mode")
### Samples and QC ###
vpa.med.neg.smpl.QC.pca <- vpa.med.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.neg.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.med.neg.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Media / Negative Mode",
type = "b"
)
vpa.med.neg.smpl.QC.pca.x <- as.data.frame(vpa.med.neg.smpl.QC.pca$x)
vpa.med.neg.smpl.QC.pca.x <- vpa.med.neg.smpl.QC.pca.x %>%
bind_cols(
vpa.med.neg.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.neg.smpl.QC.pca.x) <- vpa.med.neg.smpl.QC.pca.x$Samples
vpa.med.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (56.8% Var)") +
ylab("PC2 (25.0%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Negative Mode")
vpa.med.neg.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (9.0% Var)") +
ylab("PC4 (4.1%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Negative Mode")
### Experimental Samples Only ###
vpa.med.neg.smpl.pca <- vpa.med.neg.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.neg.smpl.pca$sdev ^ 2) * 100 / sum(vpa.med.neg.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Media / Negative Mode",
type = "b"
)
vpa.med.neg.smpl.pca.x <- as.data.frame(vpa.med.neg.smpl.pca$x)
vpa.med.neg.smpl.pca.x <- vpa.med.neg.smpl.pca.x %>%
bind_cols(
vpa.med.neg.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.neg.smpl.pca.x) <- vpa.med.neg.smpl.pca.x$Samples
vpa.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (58.2% Var)") +
ylab("PC2 (27.5%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Media / Negative Mode")
vpa.med.neg.smpl.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment, shape = Experiment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.3% Var)") +
ylab("PC4 (2.4%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Media / Negative Mode")
### PCA on all Samples ###
vpa.med.pos.full.pca <- vpa.med.pos.noNA %>%
select(starts_with("VPApM")) %>%
# good idea to center data before pca, but scaling should not be necessary
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
# plot variance explained by each new principal component
plot(
(vpa.med.pos.full.pca$sdev ^ 2) * 100 / sum(vpa.med.pos.full.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nAll samples only\nVPA-only / Media / Positive Mode",
type = "b"
)
vpa.med.pos.full.pca.x <- as.data.frame(vpa.med.pos.full.pca$x)
row.names(vpa.med.pos.full.pca.x) <- vpa.med.pos.noNA$Samples
vpa.med.pos.full.pca.x <- vpa.med.pos.full.pca.x %>%
bind_cols(vpa.med.pos.noNA %>% select(Group:Experiment))
vpa.med.pos.full.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Group, shape = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (85.6% Var)") +
ylab("PC2 (6.3%)") +
ggtitle("Principal Component Analysis\nAll Samples\nVPA-only / Media / Positive Mode")
### Samples and QC ###
vpa.med.pos.smpl.QC.pca <- vpa.med.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(starts_with("VPApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.pos.smpl.QC.pca$sdev ^ 2) * 100 / sum(vpa.med.pos.smpl.QC.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nSamples and QC\nVPA-only / Media / Positive Mode",
type = "b"
)
vpa.med.pos.smpl.QC.pca.x <- as.data.frame(vpa.med.pos.smpl.QC.pca$x)
vpa.med.pos.smpl.QC.pca.x <- vpa.med.pos.smpl.QC.pca.x %>%
bind_cols(
vpa.med.pos.noNA %>%
filter(Group == "sample" | Group == "QC") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.pos.smpl.QC.pca.x) <- vpa.med.pos.smpl.QC.pca.x$Samples
vpa.med.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (72.1% Var)") +
ylab("PC2 (15.6%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Positive Mode")
vpa.med.pos.smpl.QC.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (6.0% Var)") +
ylab("PC4 (2.5%)") +
ggtitle("Principal Component Analysis\nSamples and QC\nVPA-only / Media / Positive Mode")
### Experimental Samples Only ###
vpa.med.pos.smpl.pca <- vpa.med.pos.noNA %>%
filter(Group == "sample") %>%
select(starts_with("VPApM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(
(vpa.med.pos.smpl.pca$sdev ^ 2) * 100 / sum(vpa.med.pos.smpl.pca$sdev ^ 2),
xlab = "Principal Component",
ylab = "Variance Explained",
main = "Percent variance explained by each principal component\nExperimental samples only\nVPA-only / Media / Positive Mode",
type = "b"
)
vpa.med.pos.smpl.pca.x <- as.data.frame(vpa.med.pos.smpl.pca$x)
vpa.med.pos.smpl.pca.x <- vpa.med.pos.smpl.pca.x %>%
bind_cols(
vpa.med.pos.noNA %>%
filter(Group == "sample") %>%
select(Samples, Group:Experiment)
)
row.names(vpa.med.pos.smpl.pca.x) <- vpa.med.pos.smpl.pca.x$Samples
vpa.med.pos.smpl.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (80.9% Var)") +
ylab("PC2 (11.2%)") +
ggtitle("Principal Component Analysis\nExperimental samples only\nVPA-only / Media / Positive Mode")
Is VPA metabolised by cells?
vpa.med.neg.noNA %>%
select(Samples:Experiment, VPAnM24) %>%
unite(Sample_Type, Group:Treatment, sep = "_") %>%
ggplot(aes(Sample_Type, VPAnM24)) +
geom_jitter(alpha = 0.8, width = 0.1)
vpa.med.neg.noNA %>%
select(Samples:Experiment, VPAnM24) %>%
unite(Sample_Type, Group:Treatment, sep = "_") %>%
ggplot(aes(Sample_Type, VPAnM24)) +
geom_boxplot()
vpa.med.neg.noNA %>%
select(Samples:Experiment, VPAnM24) %>%
filter(Treatment == "vpa") %>%
group_by(Group) %>%
summarize(
vpa.avg = mean(VPAnM24),
vpa.sd = sd(VPAnM24)
)
# A tibble: 2 x 3
Group vpa.avg vpa.sd
<chr> <dbl> <dbl>
1 empty 20.8 0.413
2 sample 20.8 0.506
It seems likely that VPA is not getting metabolised to a great extent, but cannot be sure.
# sample prep
vpa.cell.neg.smpl.data <- vpa.cell.neg.noNA %>%
filter(Group == "sample")
vpa.cell.neg.data.for.sva <- as.matrix(
vpa.cell.neg.smpl.data[, which(
colnames(vpa.cell.neg.smpl.data) == "VPAnC1"
):ncol(vpa.cell.neg.smpl.data)]
)
row.names(vpa.cell.neg.data.for.sva) <- vpa.cell.neg.smpl.data$Samples
vpa.cell.neg.data.for.sva <- t(vpa.cell.neg.data.for.sva)
# pheno prep
vpa.cell.neg.data.pheno <- as.data.frame(vpa.cell.neg.smpl.data[, 5:7])
row.names(vpa.cell.neg.data.pheno) <- vpa.cell.neg.smpl.data$Samples
# sva calculation
vpa.cell.neg.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.cell.neg.data.pheno)
vpa.cell.neg.mod0 <- model.matrix(~ 1, data = vpa.cell.neg.data.pheno)
set.seed(2018)
num.sv(vpa.cell.neg.data.for.sva, vpa.cell.neg.mod.vpa, method = "be")
[1] 3
set.seed(2018)
num.sv(vpa.cell.neg.data.for.sva, vpa.cell.neg.mod.vpa, method = "leek")
[1] 0
set.seed(2018)
vpa.cell.neg.sv <- sva(vpa.cell.neg.data.for.sva, vpa.cell.neg.mod.vpa, vpa.cell.neg.mod0)
Number of significant surrogate variables is: 3
Iteration (out of 5 ):1 2 3 4 5
# extract the surrogate variables
vpa.cell.neg.surr.var <- as.data.frame(vpa.cell.neg.sv$sv)
colnames(vpa.cell.neg.surr.var) <- c("S1", "S2", "S3")
vpa.cell.neg.covar <- vpa.cell.neg.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.cell.neg.data.pheno, vpa.cell.neg.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.cell.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, exp_plate:S3) %>%
gather("surr_var", "value", S1:S3) %>%
ggplot(aes(exp_plate, value, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1) +
facet_wrap(~ surr_var)
vpa.cell.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, Treatment:S3) %>%
gather("surr_var", "value", S1:S3) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ surr_var)
vpa.cell.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.cell.neg.covar %>%
select(PC1:PC4, S1:S3) %>%
ggcorr(label = TRUE)
vpa.cell.neg.covar %>%
select(PC1:PC4, S1:S3) %>%
ggpairs()
vpa.cell.neg.covar %>%
ggplot(aes(S3)) +
geom_histogram(bins = 50)
vpa.cell.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
vpa.cell.neg.covar %>%
ggplot(aes(run_order, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.neg.covar %>%
select(S1:run_order) %>%
ggpairs()
vpa.cell.neg.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(run_order, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.neg.covar %>%
ggplot(aes(run_order, prot_conc_ug_uL, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
colnames(vpa.cell.neg.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
# combine the full model matrix and the surrogate variables into one
vpa.cell.neg.d.sv <- cbind(vpa.cell.neg.mod.vpa, vpa.cell.neg.surr.var[, 1:2])
head(vpa.cell.neg.d.sv)
cntrl DRUGvsCNTRL S1 S2
T1_P1_C1 1 0 -0.02572670 0.30588286
T1_P1_C2 1 0 0.22693416 -0.04248658
T1_P1_C3 1 0 0.24683594 0.01195802
T1_P1_V1 1 1 0.26304093 0.05597521
T1_P1_V2 1 1 -0.08024336 0.19758531
T1_P1_V3 1 1 -0.08965598 0.26184963
vpa.cell.neg.top.table <- vpa.cell.neg.data.for.sva %>%
# fit a linear model
lmFit(vpa.cell.neg.d.sv) %>%
# calculate the test statistics
eBayes() %>%
# select the top features that have a p-value < 0.05 after Bonferroni multiple hypothesis correction
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.cell.neg.data.for.sva))
# what the result looks like:
head(vpa.cell.neg.top.table)
logFC AveExpr t P.Value adj.P.Val
VPAnC119 -1.1117931 16.34932 -16.036945 2.285612e-13 4.091246e-11
VPAnC114 0.9324087 15.08073 11.000854 3.019527e-10 5.404953e-08
VPAnC152 -1.3070183 16.93098 -10.520550 6.782678e-10 1.214099e-07
VPAnC148 -0.6677076 14.87538 -7.893929 9.235897e-08 1.653226e-05
VPAnC18 -0.6065030 21.20349 -7.811684 1.092767e-07 1.956052e-05
VPAnC44 0.5297000 22.56006 7.496710 2.098579e-07 3.756456e-05
B
VPAnC119 20.769050
VPAnC114 13.612185
VPAnC152 12.796738
VPAnC148 7.826527
VPAnC18 7.656136
VPAnC44 6.995121
# make result more informative
vpa.cell.neg.top.w.info <- vpa.cell.neg.top.table %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.cell.neg.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl))
vpa.cell.neg.top.w.info %>%
select(compound_short, compound_full, change_in_vpa, vpa_div_cntrl)
compound_short compound_full change_in_vpa
1 VPAnC58 N-Acetylserine down
2 VPAnC65 Xanthine down
3 VPAnC97 myo-Inositol down
4 VPAnC105 N-alpha-Acetylglutamine down
5 VPAnC41 Asparagine down
6 VPAnC23 Thiosulfate down
7 VPAnC46 Deoxyribose down
8 VPAnC135 Glycerol 1-phosphoserine down
9 VPAnC86 G3P down
10 VPAnC18 Glyceric Acid down
11 VPAnC148 Sedoheptulose 7-phosphate down
12 VPAnC136 D-Glucose 6-phosphate down
13 VPAnC172 Succinoadenosine down
14 VPAnC144 Xanthosine down
15 VPAnC119 Glycerylphosphorylethanolamine down
16 VPAnC10 Lactic Acid down
17 VPAnC176 dTDP down
18 VPAnC159 CMP down
19 VPAnC127 Ribose 5-Phosphate down
20 VPAnC152 GlcNAc-1P down
21 VPAnC48 Adenine down
22 VPAnC160 UMP down
23 VPAnC138 Inosine down
24 VPAnC168 AMP down
25 VPAnC169 GMP down
26 VPAnC114 Homocitric Acid up
27 VPAnC54 Valproic acid up
28 VPAnC200 ADP-Glucose up
29 VPAnC53 O-Phosphoethanolamine up
30 VPAnC157 11Z,14Z-Eicosadienoic Acid (20:2 n-6) up
31 VPAnC189 CTP up
32 VPAnC194 GTP up
33 VPAnC67 3-Sulfinoalanine up
34 VPAnC193 ATP up
35 VPAnC73 2-Aminoadipic Acid up
36 VPAnC44 Aspartic Acid up
37 VPAnC124 Cystathionine up
38 VPAnC190 UTP up
39 VPAnC17 Serine up
40 VPAnC30 Threonine up
41 VPAnC2 Glycine up
vpa_div_cntrl
1 0.8006513
2 0.7733022
3 0.7727024
4 0.7642022
5 0.7475017
6 0.7258479
7 0.7200514
8 0.6898581
9 0.6742282
10 0.6567868
11 0.6295061
12 0.6155587
13 0.5263833
14 0.4899408
15 0.4627186
16 0.4458119
17 0.4260245
18 0.4086860
19 0.4066015
20 0.4041553
21 0.3971414
22 0.3471526
23 0.3070139
24 0.1705093
25 0.1357460
26 1.9084597
27 1.8537394
28 1.5939533
29 1.5623231
30 1.5330869
31 1.5075878
32 1.4845507
33 1.4760588
34 1.4597931
35 1.4558662
36 1.4436289
37 1.4029416
38 1.3951522
39 1.2483656
40 1.2386250
41 1.2121890
#write_csv(path = "./results/vpa_cell_neg_top_hits_w_FC_pval.csv", vpa.cell.neg.top.w.info)
vpa.cell.neg.gathered <- vpa.cell.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.neg.surr.var) %>%
select(Samples, Treatment, S1:S2, starts_with("VPAnC")) %>%
gather(key = "Compound", value = "Abundance", VPAnC1:VPAnC99)
# structure so far:
glimpse(vpa.cell.neg.gathered)
Observations: 4,117
Variables: 6
$ Samples <chr> "T1_P1_C1", "T1_P1_C2", "T1_P1_C3", "T1_P1_V1", "T1_...
$ Treatment <chr> "cntrl", "cntrl", "cntrl", "vpa", "vpa", "vpa", "cnt...
$ S1 <dbl> -0.02572670, 0.22693416, 0.24683594, 0.26304093, -0....
$ S2 <dbl> 0.30588286, -0.04248658, 0.01195802, 0.05597521, 0.1...
$ Compound <chr> "VPAnC1", "VPAnC1", "VPAnC1", "VPAnC1", "VPAnC1", "V...
$ Abundance <dbl> 15.08795, 14.67335, 14.81717, 14.55139, 14.79476, 15...
vpa.cell.neg.nested <- vpa.cell.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
# apply a linear model to each individual compound, as a function of the surrogate variables
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
# use broom to tidy up the output
mutate(augment_model = map(model, augment))
# result to far:
vpa.cell.neg.nested
# A tibble: 179 x 4
Compound data model augment_model
<chr> <list> <list> <list>
1 VPAnC1 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
2 VPAnC10 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
3 VPAnC100 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
4 VPAnC101 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
5 VPAnC102 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
6 VPAnC103 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
7 VPAnC104 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
8 VPAnC105 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
9 VPAnC106 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
10 VPAnC107 <tibble [23 x 5]> <S3: lm> <tibble [23 x 10]>
# ... with 169 more rows
# now to get the residuals out for each compound
vpa.cell.neg.modSV.resid <- vpa.cell.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
# return to long format
spread(Compound, .resid)
glimpse(vpa.cell.neg.modSV.resid[, 1:5])
Observations: 23
Variables: 5
$ Samples <chr> "T1_P1_C1", "T1_P1_C2", "T1_P1_C3", "T1_P1_V1", "T1_...
$ Treatment <chr> "cntrl", "cntrl", "cntrl", "vpa", "vpa", "vpa", "cnt...
$ VPAnC1 <dbl> 0.09809226, -0.06724331, 0.04638420, -0.24379320, -0...
$ VPAnC10 <dbl> 0.47952620, 0.79800463, 0.31534236, -1.11205044, -0....
$ VPAnC100 <dbl> 0.064964248, -0.108617277, 0.101704046, 0.098637074,...
vpa.cell.neg.modSV.resid %>%
select(Samples, one_of(vpa.cell.neg.top.w.info$compound_short)) %>%
HeatmapPrepAlt("VPAnC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA-Only Experiment / Cells / Negative Mode",
margins = c(50, 50, 75, 30),
labRow = vpa.cell.neg.top.w.info$compound_full,
k_col = 2, k_row = 2
)
### PCA - cleaned data ###
vpa.cell.neg.modSV.pca <- vpa.cell.neg.modSV.resid %>%
select(starts_with("VPAnC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
plot(vpa.cell.neg.modSV.pca$sdev^ 2 * 100 / sum(vpa.cell.neg.modSV.pca$sdev ^ 2))
vpa.cell.neg.modSV.pca.x <- as.data.frame(vpa.cell.neg.modSV.pca$x)
row.names(vpa.cell.neg.modSV.pca.x) <- vpa.cell.neg.modSV.resid$Samples
vpa.cell.neg.modSV.pca.x <- vpa.cell.neg.modSV.pca.x %>%
bind_cols(vpa.cell.neg.modSV.resid %>% select(Treatment))
vpa.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (54.1% Var)") +
ylab("PC2 (12.9% Var)")
vpa.cell.neg.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (8.4% Var)") +
ylab("PC4 (6.1% Var)")
vpa.cell.pos.smpl.data <- vpa.cell.pos.noNA %>%
filter(Group == "sample")
vpa.cell.pos.data.for.sva <- as.matrix(
vpa.cell.pos.smpl.data[, which(
colnames(vpa.cell.pos.smpl.data) == "VPApC1"
):ncol(vpa.cell.pos.smpl.data)]
)
row.names(vpa.cell.pos.data.for.sva) <- vpa.cell.pos.smpl.data$Samples
vpa.cell.pos.data.for.sva <- t(vpa.cell.pos.data.for.sva)
vpa.cell.pos.data.pheno <- as.data.frame(vpa.cell.pos.smpl.data[, 5:7])
row.names(vpa.cell.pos.data.pheno) <- vpa.cell.pos.smpl.data$Samples
# sva calculation
vpa.cell.pos.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.cell.pos.data.pheno)
vpa.cell.pos.mod0 <- model.matrix(~ 1, data = vpa.cell.pos.data.pheno)
set.seed(2018)
num.sv(vpa.cell.pos.data.for.sva, vpa.cell.pos.mod.vpa, method = "be")
[1] 2
set.seed(2018)
num.sv(vpa.cell.pos.data.for.sva, vpa.cell.pos.mod.vpa, method = "leek")
[1] 2
set.seed(2018)
vpa.cell.pos.sv <- sva(vpa.cell.pos.data.for.sva, vpa.cell.pos.mod.vpa, vpa.cell.pos.mod0)
Number of significant surrogate variables is: 2
Iteration (out of 5 ):1 2 3 4 5
vpa.cell.pos.surr.var <- as.data.frame(vpa.cell.pos.sv$sv)
colnames(vpa.cell.pos.surr.var) <- c("S1", "S2")
vpa.cell.pos.covar <- vpa.cell.pos.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.cell.pos.data.pheno, vpa.cell.pos.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.cell.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, exp_plate:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(exp_plate, value, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1) +
facet_wrap(~ surr_var)
vpa.cell.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples, Treatment:S2) %>%
gather("surr_var", "value", S1:S2) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ surr_var)
vpa.cell.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.cell.pos.covar %>%
select(PC1:PC4, S1:S2) %>%
ggcorr(label = TRUE)
vpa.cell.pos.covar %>%
select(PC1:PC4, S1:S2) %>%
ggpairs()
vpa.cell.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
vpa.cell.pos.covar %>%
ggplot(aes(run_order, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.cell.pos.covar %>%
select(S1:run_order) %>%
ggpairs()
vpa.cell.pos.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(run_order, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.cell.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, S2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
colnames(vpa.cell.pos.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
vpa.cell.pos.d.sv <- cbind(vpa.cell.pos.mod.vpa, vpa.cell.pos.surr.var)
head(vpa.cell.pos.d.sv)
cntrl DRUGvsCNTRL S1 S2
T1_P1_C1 1 0 -0.3187310 0.19917816
T1_P1_C2 1 0 -0.2279608 -0.06387899
T1_P1_C3 1 0 -0.1409242 0.01021199
T1_P1_V1 1 1 -0.2796968 -0.36213773
T1_P1_V2 1 1 -0.1999223 -0.13213535
T1_P1_V3 1 1 -0.2955297 0.16841744
vpa.cell.pos.top.table <- vpa.cell.pos.data.for.sva %>%
lmFit(vpa.cell.pos.d.sv) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.cell.pos.data.for.sva))
vpa.cell.pos.top.w.info <- vpa.cell.pos.top.table %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.cell.pos.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl))
vpa.cell.pos.top.w.info %>%
select(compound_short, compound_full, change_in_vpa, vpa_div_cntrl)
compound_short compound_full change_in_vpa
1 VPApC35 Asparagine down
2 VPApC40 N-Methylnicotinate down
3 VPApC51 Xanthine down
4 VPApC101 Glycerol 1-phosphoserine down
5 VPApC24 Nicotinamide down
6 VPApC49 D-Ribose down
7 VPApC89 GlcNAc down
8 VPApC67 G3P down
9 VPApC112 Xanthosine down
10 VPApC38 Adenine down
11 VPApC39 Hypoxanthine down
12 VPApC119 CMP down
13 VPApC14 Uracil down
14 VPApC134 ADP down
15 VPApC107 Inosine down
16 VPApC85 Glycerylphosphorylethanolamine down
17 VPApC60 Fucose down
18 VPApC95 Uridine down
19 VPApC74 D-Galactitol down
20 VPApC120 UMP down
21 VPApC121 NMN down
22 VPApC100 Glycerol-3-phosphocholine down
23 VPApC17 Proline up
24 VPApC37 Aspartic Acid up
25 VPApC52 3-Sulfinoalanine up
26 VPApC172 PC(36:4) up
27 VPApC159 ADP-Glucose up
28 VPApC41 O-Phosphoethanolamine up
29 VPApC56 Methyl-L-Lysine up
30 VPApC90 Cystathionine up
31 VPApC105 Thiamine up
32 VPApC171 PC(35:2) up
vpa_div_cntrl
1 0.8020385
2 0.7628304
3 0.7304178
4 0.6439406
5 0.6390530
6 0.6253119
7 0.6142356
8 0.6080481
9 0.5521731
10 0.4588743
11 0.4538572
12 0.4491053
13 0.4414234
14 0.4391555
15 0.4284642
16 0.4242119
17 0.4078264
18 0.4012096
19 0.3819708
20 0.3793175
21 0.3466167
22 0.1517526
23 1.8933351
24 1.5849782
25 1.5334580
26 1.4855587
27 1.4155297
28 1.3495668
29 1.3212269
30 1.2885595
31 1.2460124
32 1.2131600
#write_csv(path = "./results/vpa_cell_pos_top_hits_w_FC_pval.csv", vpa.cell.pos.top.w.info)
vpa.cell.pos.gathered <- vpa.cell.pos.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.cell.pos.surr.var) %>%
select(Samples, Treatment, S1:S2, starts_with("VPApC")) %>%
gather(key = "Compound", value = "Abundance", VPApC1:VPApC98)
vpa.cell.pos.nested <- vpa.cell.pos.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ S1 + S2, data = .))) %>%
mutate(augment_model = map(model, augment))
vpa.cell.pos.modSV.resid <- vpa.cell.pos.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
spread(Compound, .resid)
vpa.cell.pos.modSV.resid %>%
select(Samples, one_of(vpa.cell.pos.top.w.info$compound_short)) %>%
HeatmapPrepAlt("VPApC") %>%
t() %>%
heatmaply(
colors = viridis(n = 10, option = "magma"),
xlab = "Samples", ylab = "Compounds",
main = "Statistically significant compounds\nVPA-Only Experiment / Cells / Positive Mode",
margins = c(50, 50, 75, 30),
labRow = vpa.cell.pos.top.w.info$compound_full,
k_row = 2, k_col = 2
)
### PCA - cleaned data ###
vpa.cell.pos.modSV.pca <- vpa.cell.pos.modSV.resid %>%
select(starts_with("VPApC")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vpa.cell.pos.modSV.pca.x <- as.data.frame(vpa.cell.pos.modSV.pca$x)
row.names(vpa.cell.pos.modSV.pca.x) <- vpa.cell.pos.modSV.resid$Samples
vpa.cell.pos.modSV.pca.x <- vpa.cell.pos.modSV.pca.x %>%
bind_cols(vpa.cell.pos.modSV.resid %>% select(Treatment))
vpa.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (65.4% Var)") +
ylab("PC2 (12.4% Var)")
vpa.cell.pos.modSV.pca.x %>%
ggplot(aes(x = PC3, y = PC4, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC3 (6.5% Var)") +
ylab("PC4 (4.5% Var)")
vpa.med.neg.smpl.data <- vpa.med.neg.noNA %>%
filter(Group == "sample")
vpa.med.neg.data.for.sva <- as.matrix(
vpa.med.neg.smpl.data[, which(
colnames(vpa.med.neg.smpl.data) == "VPAnM1"
):ncol(vpa.med.neg.smpl.data)]
)
row.names(vpa.med.neg.data.for.sva) <- vpa.med.neg.smpl.data$Samples
vpa.med.neg.data.for.sva <- t(vpa.med.neg.data.for.sva)
vpa.med.neg.data.pheno <- as.data.frame(vpa.med.neg.smpl.data[, 5:7])
row.names(vpa.med.neg.data.pheno) <- vpa.med.neg.smpl.data$Samples
vpa.med.neg.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.med.neg.data.pheno)
vpa.med.neg.mod0 <- model.matrix(~ 1, data = vpa.med.neg.data.pheno)
set.seed(2018)
num.sv(vpa.med.neg.data.for.sva, vpa.med.neg.mod.vpa, method = "be")
[1] 1
set.seed(2018)
num.sv(vpa.med.neg.data.for.sva, vpa.med.neg.mod.vpa, method = "leek")
[1] 0
set.seed(2018)
vpa.med.neg.sv <- sva(vpa.med.neg.data.for.sva, vpa.med.neg.mod.vpa, vpa.med.neg.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
vpa.med.neg.surr.var <- as.data.frame(vpa.med.neg.sv$sv)
colnames(vpa.med.neg.surr.var) <- c("S1")
vpa.med.neg.covar <- vpa.med.neg.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.med.neg.data.pheno, vpa.med.neg.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.med.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1)
vpa.med.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment))
vpa.med.neg.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.med.neg.covar %>%
select(PC1:PC4, S1) %>%
ggcorr(label = TRUE)
vpa.med.neg.covar %>%
select(PC1:PC4, S1) %>%
ggpairs()
vpa.med.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.med.neg.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
vpa.med.neg.covar %>%
ggplot(aes(run_order, PC3, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.neg.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.med.neg.covar %>%
select(S1:run_order) %>%
ggpairs()
vpa.med.neg.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.neg.covar %>%
ggplot(aes(prot_conc_ug_uL, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.neg.pca.var <- data.frame(vpa.med.neg.smpl.pca.x$PC3)
colnames(vpa.med.neg.pca.var) <- "PC3"
setequal(row.names(vpa.med.neg.smpl.pca.x), row.names(vpa.med.neg.mod.vpa))
[1] TRUE
head(vpa.med.neg.pca.var)
PC3
1 2.0980413
2 1.2398362
3 0.9684519
4 1.3396242
5 1.0965746
6 0.8120326
colnames(vpa.med.neg.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
vpa.med.neg.d.sv <- cbind(vpa.med.neg.mod.vpa, vpa.med.neg.pca.var)
head(vpa.med.neg.d.sv)
cntrl DRUGvsCNTRL PC3
T1_P1_C1 1 0 2.0980413
T1_P1_C2 1 0 1.2398362
T1_P1_C3 1 0 0.9684519
T1_P1_V1 1 1 1.3396242
T1_P1_V2 1 1 1.0965746
T1_P1_V3 1 1 0.8120326
vpa.med.neg.top.table <- vpa.med.neg.data.for.sva %>%
lmFit(vpa.med.neg.d.sv) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.med.neg.data.for.sva))
vpa.med.neg.top.w.info <- vpa.med.neg.top.table %>%
rownames_to_column("compound_short") %>%
mutate(
vpa_div_cntrl = 2 ^ logFC,
change_in_vpa = ifelse(vpa_div_cntrl < 1, "down", "up")
) %>%
inner_join(vpa.med.neg.compound.info, by = "compound_short") %>%
filter(vpa_div_cntrl > 1.2 | vpa_div_cntrl < 0.83) %>%
arrange(change_in_vpa, desc(vpa_div_cntrl))
vpa.med.neg.top.w.info %>%
select(compound_short, compound_full, change_in_vpa, vpa_div_cntrl)
compound_short compound_full change_in_vpa vpa_div_cntrl
1 VPAnM24 Valproic acid up 35.71479
vpa.med.neg.gathered <- vpa.med.neg.noNA %>%
filter(Group == "sample") %>%
bind_cols(vpa.med.neg.pca.var) %>%
select(Samples, Treatment, PC3, starts_with("VPAnM")) %>%
gather(key = "Compound", value = "Abundance", VPAnM1:VPAnM9)
vpa.med.neg.nested <- vpa.med.neg.gathered %>%
group_by(Compound) %>%
nest() %>%
mutate(model = map(data, ~lm(Abundance ~ PC3, data = .))) %>%
mutate(augment_model = map(model, augment))
vpa.med.neg.modSV.resid <- vpa.med.neg.nested %>%
unnest(data, augment_model) %>%
select(Samples, Treatment, Compound, .resid) %>%
spread(Compound, .resid)
vpa.med.neg.modSV.resid %>%
select(Samples:Treatment, one_of(vpa.med.neg.top.w.info$compound_short)) %>%
ggplot(aes(Treatment, VPAnM24)) +
geom_boxplot() +
geom_jitter(size = 3, alpha = 0.8, aes(color = Treatment))
### PCA - cleaned data ###
vpa.med.neg.modSV.pca <- vpa.med.neg.modSV.resid %>%
select(starts_with("VPAnM")) %>%
mutate_all(scale, center = TRUE, scale = FALSE) %>%
as.matrix() %>%
prcomp()
vpa.med.neg.modSV.pca.x <- as.data.frame(vpa.med.neg.modSV.pca$x)
row.names(vpa.med.neg.modSV.pca.x) <- vpa.med.neg.modSV.resid$Samples
vpa.med.neg.modSV.pca.x <- vpa.med.neg.modSV.pca.x %>%
bind_cols(vpa.med.neg.modSV.resid %>% select(Treatment))
vpa.med.neg.modSV.pca.x %>%
ggplot(aes(x = PC1, y = PC2, color = Treatment)) +
geom_point(size = 4, alpha = 0.8) +
xlab("PC1 (63.4% Var)") +
ylab("PC2 (30.0% Var)")
vpa.med.pos.smpl.data <- vpa.med.pos.noNA %>%
filter(Group == "sample")
vpa.med.pos.data.for.sva <- as.matrix(
vpa.med.pos.smpl.data[, which(
colnames(vpa.med.pos.smpl.data) == "VPApM1"
):ncol(vpa.med.pos.smpl.data)]
)
row.names(vpa.med.pos.data.for.sva) <- vpa.med.pos.smpl.data$Samples
vpa.med.pos.data.for.sva <- t(vpa.med.pos.data.for.sva)
vpa.med.pos.data.pheno <- as.data.frame(vpa.med.pos.smpl.data[, 5:7])
row.names(vpa.med.pos.data.pheno) <- vpa.med.pos.smpl.data$Samples
vpa.med.pos.mod.vpa <- model.matrix(~ as.factor(Treatment), data = vpa.med.pos.data.pheno)
vpa.med.pos.mod0 <- model.matrix(~ 1, data = vpa.med.pos.data.pheno)
set.seed(2018)
num.sv(vpa.med.pos.data.for.sva, vpa.med.pos.mod.vpa, method = "be")
[1] 1
set.seed(2018)
num.sv(vpa.med.pos.data.for.sva, vpa.med.pos.mod.vpa, method = "leek")
[1] 0
set.seed(2018)
vpa.med.pos.sv <- sva(vpa.med.pos.data.for.sva, vpa.med.pos.mod.vpa, vpa.med.pos.mod0)
Number of significant surrogate variables is: 1
Iteration (out of 5 ):1 2 3 4 5
vpa.med.pos.surr.var <- as.data.frame(vpa.med.pos.sv$sv)
colnames(vpa.med.pos.surr.var) <- c("S1")
vpa.med.pos.covar <- vpa.med.pos.smpl.pca.x %>%
select(Samples, PC1:PC4) %>%
inner_join(
cbind(vpa.med.pos.data.pheno, vpa.med.pos.surr.var) %>%
rownames_to_column("Samples"),
by = "Samples"
) %>%
full_join(vpa.cell.other.info, by = "Samples") %>%
as_tibble()
vpa.med.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1, color = exp_plate)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1)
vpa.med.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
ggplot(aes(exp_plate, S1)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment))
vpa.med.pos.covar %>%
unite("exp_plate", Experiment, Plate, sep = "_") %>%
select(Samples:exp_plate) %>%
gather("PC", "value", PC1:PC4) %>%
ggplot(aes(exp_plate, value)) +
geom_boxplot() +
geom_jitter(size = 2, alpha = 0.8, width = 0.1, aes(color = Treatment)) +
facet_wrap(~ PC, scales = "free")
vpa.med.pos.covar %>%
select(PC1:PC4, S1) %>%
ggcorr(label = TRUE)
vpa.med.pos.covar %>%
select(PC1:PC4, S1) %>%
ggpairs()
vpa.med.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggcorr(label = TRUE)
vpa.med.pos.covar %>%
select(PC1:PC4, prot_conc_ug_uL:run_order) %>%
ggpairs()
vpa.med.pos.covar %>%
ggplot(aes(run_order, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, PC2, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.pos.covar %>%
select(S1:run_order) %>%
ggcorr(label = TRUE)
vpa.med.pos.covar %>%
select(S1:run_order) %>%
ggpairs()
vpa.med.pos.covar %>%
ggplot(aes(run_order, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.pos.covar %>%
ggplot(aes(prot_conc_ug_uL, S1, color = Treatment, shape = Experiment)) +
geom_point(size = 3, alpha = 0.8)
vpa.med.pos.pca.var <- data.frame(vpa.med.pos.smpl.pca.x$PC2)
colnames(vpa.med.pos.pca.var) <- "PC2"
setequal(row.names(vpa.med.pos.smpl.pca.x), row.names(vpa.med.pos.mod.vpa))
[1] TRUE
head(vpa.med.pos.pca.var)
PC2
1 -1.8760311
2 -1.9221080
3 -1.5321371
4 -1.0724871
5 -0.6100211
6 -1.1385369
colnames(vpa.med.pos.mod.vpa) <- c("cntrl", "DRUGvsCNTRL")
vpa.med.pos.d.sv <- cbind(vpa.med.pos.mod.vpa, vpa.med.pos.pca.var)
head(vpa.med.pos.d.sv)
cntrl DRUGvsCNTRL PC2
T1_P1_C1 1 0 -1.8760311
T1_P1_C2 1 0 -1.9221080
T1_P1_C3 1 0 -1.5321371
T1_P1_V1 1 1 -1.0724871
T1_P1_V2 1 1 -0.6100211
T1_P1_V3 1 1 -1.1385369
vpa.med.pos.top.table <- vpa.med.pos.data.for.sva %>%
lmFit(vpa.med.pos.d.sv) %>%
eBayes() %>%
topTable(coef = "DRUGvsCNTRL", adjust = "bonferroni", p.value = 0.05, n = nrow(vpa.med.pos.data.for.sva))
vpa.med.pos.top.table
data frame with 0 columns and 0 rows
head(vpa.cell.neg.top.w.info)
compound_short logFC AveExpr t P.Value adj.P.Val
1 VPAnC58 -0.3207540 17.48468 -4.615315 1.445584e-04 0.0258759505
2 VPAnC65 -0.3708957 20.56905 -4.826636 8.713540e-05 0.0155972362
3 VPAnC97 -0.3720153 19.32692 -7.495251 2.104994e-07 0.0000376794
4 VPAnC105 -0.3879737 15.24568 -4.509934 1.861965e-04 0.0333291767
5 VPAnC41 -0.4198513 18.18660 -6.550547 1.613484e-06 0.0002888137
6 VPAnC23 -0.4622609 18.27583 -6.245694 3.190768e-06 0.0005711475
B vpa_div_cntrl change_in_vpa compound_full
1 0.4099156 0.8006513 down N-Acetylserine
2 0.9152870 0.7733022 down Xanthine
3 6.9920292 0.7727024 down myo-Inositol
4 0.1577535 0.7642022 down N-alpha-Acetylglutamine
5 4.9304963 0.7475017 down Asparagine
6 4.2413631 0.7258479 down Thiosulfate
formula mass rt cas_id
1 C5 H9 N O4 147.054 3.38 16354-58-8
2 C5 H4 N4 O2 152.034 1.92 69-89-6
3 C6 H12 O6 180.064 5.02 87-89-8
4 C7 H12 N2 O4 188.079 5.83 2490-97-3
5 C4 H8 N2 O3 132.054 8.37 70-47-3
6 H2 O3 S2 113.945 4.85 14383-50-7
head(vpa.cell.pos.top.w.info)
compound_short logFC AveExpr t P.Value adj.P.Val
1 VPApC35 -0.3182566 18.67222 -4.853699 7.687773e-05 0.010916637
2 VPApC40 -0.3905657 15.41180 -4.621143 1.350775e-04 0.019181004
3 VPApC51 -0.4532062 16.91997 -4.489349 1.860706e-04 0.026422023
4 VPApC101 -0.6350004 14.04687 -5.753222 8.991813e-06 0.001276837
5 VPApC24 -0.6459926 19.38535 -4.275739 3.128901e-04 0.044430397
6 VPApC49 -0.6773521 14.61980 -4.311187 2.870291e-04 0.040758128
B vpa_div_cntrl change_in_vpa compound_full
1 0.58407812 0.8020385 down Asparagine
2 0.01844396 0.7628304 down N-Methylnicotinate
3 -0.30211613 0.7304178 down Xanthine
4 2.75153845 0.6439406 down Glycerol 1-phosphoserine
5 -0.82081936 0.6390530 down Nicotinamide
6 -0.73485577 0.6253119 down D-Ribose
formula mass rt cas_id
1 C4 H8 N2 O3 132.054 8.39 70-47-3
2 C7 H7 N O2 137.048 10.39 535-83-1
3 C5 H4 N4 O2 152.034 1.92 69-89-6
4 C6 H14 N O8 P 259.045 8.00 26289-09-8
5 C6 H6 N2 O 122.048 1.90 98-92-0
6 C5 H10 O5 150.052 2.45 50-69-1
head(vpa.med.neg.top.w.info)
compound_short logFC AveExpr t P.Value adj.P.Val
1 VPAnM24 5.15845 18.25088 29.61713 1.105853e-22 4.533997e-21
B vpa_div_cntrl change_in_vpa compound_full formula mass rt
1 41.71555 35.71479 up Valproic acid C8 H16 O2 144.115 1.3
cas_id
1 99-66-1
vpa.full.hit.list <- vpa.cell.neg.top.w.info %>%
mutate(Mode = "cell.neg") %>%
bind_rows(
vpa.cell.pos.top.w.info %>%
mutate(Mode = "cell.pos")
) %>%
bind_rows(
vpa.med.neg.top.w.info %>%
mutate(Mode = "med.neg")
) %>%
as.tibble()
vpa.sml.hit.list <- vpa.full.hit.list %>%
select(compound_full:formula, cas_id) %>%
distinct() %>%
arrange(compound_full) %>%
left_join(cmpnd.id.db, by = "cas_id")
all.equal(vpa.sml.hit.list$compound_full.x, vpa.sml.hit.list$compound_full.y)
[1] TRUE
#write_csv(path = "./results/vpa_only_exp_hits_w_kegg_sv_mod.csv", vpa.sml.hit.list)
glimpse(vpa.sml.hit.list)
Observations: 58
Variables: 7
$ compound_full.x <chr> "11Z,14Z-Eicosadienoic Acid (20:2 n-6)", "2-Am...
$ formula <chr> "C20 H36 O2", "C6 H11 N O4", "C3 H7 N O4 S", "...
$ cas_id <chr> "2091-39-6", "542-32-5", "1115-65-7", "73-24-5...
$ compound_full.y <chr> "11Z,14Z-Eicosadienoic Acid (20:2 n-6)", "2-Am...
$ HMDB <chr> "HMDB05060", "HMDB00510", "HMDB00996", "HMDB00...
$ Metlin <chr> "62964", "3271", "5927", "85", "34522", "63203...
$ KEGG <chr> "C16525", "C00956", "C00606", "C00147", "C0000...
nucleotide.metab <- read_csv("./data/pathways/vpa_only_exp_nucleotide_hits.csv")
### Purine Metabolism ###
purine.triphosphates <- c("ATP", "GTP", "dATP", "dGTP")
purine.for.plot <- nucleotide.metab %>%
inner_join(vpa.full.hit.list, by = "cas_id") %>%
filter(path1 == "Purine" | path2 == "Purine" | path3 == "Purine") %>%
select(compound_full.x, compound_short) %>%
rename(compound_full = compound_full.x) %>%
bind_rows(
vpa.cell.neg.compound.info %>%
filter(compound_full %in% purine.triphosphates) %>%
select(compound_full, compound_short)
) %>%
bind_rows(
vpa.cell.pos.compound.info %>%
filter(compound_full %in% purine.triphosphates) %>%
select(compound_full, compound_short)
)
#write_csv(path = "./data/pathways/purine_pathway.csv", purine.for.plot) do not run
purine.plot.order <- read_csv("./data/pathways/purine_pathway.csv") %>%
mutate(plot_order = factor(Name, levels = Name))
purine.data <- vpa.cell.neg.modSV.resid %>%
inner_join(vpa.cell.pos.modSV.resid %>% select(-Treatment), by = "Samples") %>%
select(Samples:Treatment, one_of(purine.for.plot$compound_short)) %>%
mutate_at(vars(matches("VPA")), scale)
purine.sig.plot <- purine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(
purine.plot.order %>%
filter(compound_full != "Aspartic Acid" & compound_full != "Ribose 5-Phosphate"),
by = "compound_short"
) %>%
filter(Sig == "Y") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
# ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nStatistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
purine.sig.plot
purine.not.sig.plot <- purine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(purine.plot.order, by = "compound_short") %>%
filter(Sig == "N") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nNot Statistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
purine.not.sig.plot
### Pyrimidine Metabolism ###
pyrimidine.triphosphates <- c("UTP", "CTP", "dTTP", "dCTP")
pyrimidine.for.plot <- nucleotide.metab %>%
inner_join(vpa.full.hit.list, by = "cas_id") %>%
filter(path1 == "Pyrimidine" | path2 == "Pyrimidine" | path3 == "Pyrimidine") %>%
select(compound_full.x, compound_short) %>%
rename(compound_full = compound_full.x) %>%
bind_rows(
vpa.cell.neg.compound.info %>%
filter(compound_full %in% pyrimidine.triphosphates) %>%
select(compound_full, compound_short)
) %>%
bind_rows(
vpa.cell.pos.compound.info %>%
filter(compound_full %in% pyrimidine.triphosphates) %>%
select(compound_full, compound_short)
) %>%
distinct()
#write_csv(path = "./data/pathways/pyrimidine_pathway.csv", pyrimidine.for.plot)
pyrimidine.plot.order <- read_csv("./data/pathways/pyrimidine_pathway.csv") %>%
mutate(plot_order = factor(Name, levels = Name))
pyrimidine.data <- vpa.cell.neg.modSV.resid %>%
inner_join(vpa.cell.pos.modSV.resid %>% select(-Treatment), by = "Samples") %>%
select(Samples:Treatment, one_of(pyrimidine.for.plot$compound_short)) %>%
mutate_at(vars(matches("VPA")), scale)
pyrimidine.sig.plot <- pyrimidine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(
pyrimidine.plot.order %>%
filter(compound_full != "Ribose 5-Phosphate"), by = "compound_short") %>%
filter(Sig == "Y") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nStatistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
pyrimidine.sig.plot
pyrimidine.not.sig.plot <- pyrimidine.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(pyrimidine.plot.order, by = "compound_short") %>%
filter(Sig == "N") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90")
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nPyrimidine Metabolism\nNot Statistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.5, 2.5)
pyrimidine.not.sig.plot
### Pentose Phosphate Pathway ###
ppp.for.plot <- nucleotide.metab %>%
inner_join(vpa.full.hit.list, by = "cas_id") %>%
filter(path1 == "PPP" | path2 == "PPP" | path3 == "PPP") %>%
select(compound_full.x, compound_short) %>%
rename(compound_full = compound_full.x)
#write_csv(path = "./data/pathways/ppp_pathway.csv", ppp.for.plot)
ppp.plot.order <- read_csv("./data/pathways/ppp_pathway.csv") %>%
mutate(plot_order = factor(Name, levels = Name))
ppp.data <- vpa.cell.neg.modSV.resid %>%
inner_join(vpa.cell.pos.modSV.resid %>% select(-Treatment), by = "Samples") %>%
select(Samples:Treatment, one_of(ppp.for.plot$compound_short)) %>%
mutate_at(vars(matches("VPA")), scale)
ppp.sig.plot <- ppp.data %>%
gather(key = "compound_short", value = "Abundance", -Samples, -Treatment) %>%
inner_join(ppp.plot.order, by = "compound_short") %>%
ggplot(aes(plot_order, Abundance, color = Treatment)) +
geom_boxplot(position = position_dodge(0.8)) +
geom_jitter(size = 2, alpha = 0.5, position = position_dodge(0.8)) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1),
panel.background = element_rect(fill = "gray90"),
legend.justification = "top"
) +
xlab("") +
ylab("") +
#ggtitle("Normalized Abundance by Treatment Group\nppp Metabolism\nStatistically Significant Compounds") +
scale_color_manual(values = c("#56B4E9", "#E69F00"), labels = c("Control", "VPA")) +
ylim(-2.75, 2.5) +
scale_x_discrete(labels = c("Glucose\n6-Phosphate", "Ribose", "Deoxyribose", "Glyceraldehyde\n3-Phosphate (Neg)", "Glyceraldehyde\n3-Phosphate (Pos)", "Ribose\n5-Phosphate", "Sedoheptulose\n7-Phosphate"))
ppp.sig.plot
### all together
nuc.legend <- get_legend(ppp.sig.plot)
vpa.only.nuc.sig.plot.grid <- plot_grid(
purine.sig.plot + theme(legend.position = "none", plot.margin = unit(c(6,6,0,0), "pt")),
pyrimidine.sig.plot + theme(legend.position = "none", plot.margin = unit(c(0,6,0,0), "pt")),
plot_grid(
ppp.sig.plot + theme(legend.position = "none", plot.margin = unit(c(0,6,0,0), "pt")),
nuc.legend,
rel_widths = c(1.45, 0.55)
),
ncol = 1, labels = c("A", "B", "C"),
rel_heights = c(1, 1, 1)
)
vpa.only.nuc.sig.plot.grid
#save_plot("./results/vpa_only_exp_nuc_sig.png", vpa.only.nuc.sig.plot.grid, base_height = 10, base_width = 8)
nuc.not.sig.row.plots <- plot_grid(
purine.not.sig.plot,
pyrimidine.not.sig.plot,
labels = c("A", "B"),
ncol = 1
)
nuc.not.sig.row.plots
#save_plot("./results/vpa_only_exp_nuc_not_sig.png", nuc.not.sig.row.plots, base_width = 8, base_height = 8)
sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 7 x64 (build 7601) Service Pack 1
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] bindrcpp_0.2.2 ggridges_0.5.1 broom_0.5.0
[4] limma_3.36.5 sva_3.28.0 BiocParallel_1.14.2
[7] genefilter_1.62.0 mgcv_1.8-25 nlme_3.1-137
[10] heatmaply_0.15.2 viridis_0.5.1 viridisLite_0.3.0
[13] plotly_4.8.0 GGally_1.4.0 cowplot_0.9.3
[16] forcats_0.3.0 stringr_1.3.1 dplyr_0.7.8
[19] purrr_0.2.5 readr_1.1.1 tidyr_0.8.2
[22] tibble_1.4.2 ggplot2_3.1.0 tidyverse_1.2.1
loaded via a namespace (and not attached):
[1] colorspace_1.3-2 class_7.3-14 modeltools_0.2-22
[4] mclust_5.4.1 rprojroot_1.3-2 rstudioapi_0.8
[7] flexmix_2.3-14 bit64_0.9-7 fansi_0.4.0
[10] AnnotationDbi_1.42.1 mvtnorm_1.0-8 lubridate_1.7.4
[13] xml2_1.2.0 splines_3.5.1 codetools_0.2-15
[16] robustbase_0.93-3 knitr_1.20 jsonlite_1.5
[19] annotate_1.58.0 cluster_2.0.7-1 kernlab_0.9-27
[22] shiny_1.2.0 compiler_3.5.1 httr_1.3.1
[25] backports_1.1.2 assertthat_0.2.0 Matrix_1.2-15
[28] lazyeval_0.2.1 cli_1.0.1 later_0.7.5
[31] htmltools_0.3.6 tools_3.5.1 gtable_0.2.0
[34] glue_1.3.0 reshape2_1.4.3 Rcpp_1.0.0
[37] Biobase_2.40.0 cellranger_1.1.0 trimcluster_0.1-2.1
[40] gdata_2.18.0 crosstalk_1.0.0 iterators_1.0.10
[43] fpc_2.1-11.1 rvest_0.3.2 mime_0.6
[46] gtools_3.8.1 XML_3.98-1.16 dendextend_1.9.0
[49] DEoptimR_1.0-8 MASS_7.3-51.1 scales_1.0.0
[52] TSP_1.1-6 promises_1.0.1 hms_0.4.2
[55] parallel_3.5.1 RColorBrewer_1.1-2 yaml_2.2.0
[58] memoise_1.1.0 gridExtra_2.3 reshape_0.8.8
[61] stringi_1.2.4 RSQLite_2.1.1 gclus_1.3.1
[64] S4Vectors_0.18.3 foreach_1.4.4 seriation_1.2-3
[67] caTools_1.17.1.1 BiocGenerics_0.26.0 matrixStats_0.54.0
[70] rlang_0.3.0.1 pkgconfig_2.0.2 prabclus_2.2-6
[73] bitops_1.0-6 evaluate_0.12 lattice_0.20-38
[76] bindr_0.1.1 labeling_0.3 htmlwidgets_1.3
[79] bit_1.1-14 tidyselect_0.2.5 plyr_1.8.4
[82] magrittr_1.5 R6_2.3.0 IRanges_2.14.12
[85] gplots_3.0.1 DBI_1.0.0 pillar_1.3.0
[88] haven_1.1.2 whisker_0.3-2 withr_2.1.2
[91] survival_2.43-1 RCurl_1.95-4.11 nnet_7.3-12
[94] modelr_0.1.2 crayon_1.3.4 utf8_1.1.4
[97] KernSmooth_2.23-15 rmarkdown_1.10 grid_3.5.1
[100] readxl_1.1.0 data.table_1.11.8 blob_1.1.1
[103] digest_0.6.18 diptest_0.75-7 webshot_0.5.1
[106] xtable_1.8-3 httpuv_1.4.5 stats4_3.5.1
[109] munsell_0.5.0 registry_0.5